@aiello/wechat-to-markdown 1.2.12 → 1.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -27,9 +27,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
27
27
  var src_exports = {};
28
28
  __export(src_exports, {
29
29
  Status: () => Status,
30
- default: () => transformHtml2Markdown,
31
30
  getTurnDownService: () => getTurnDownService,
32
- parseHTML: () => parseHTML
31
+ parseHTML: () => parseHTML,
32
+ transformHtml2Markdown: () => transformHtml2Markdown,
33
+ transformUrl2Markdown: () => transformUrl2Markdown
33
34
  });
34
35
  module.exports = __toCommonJS(src_exports);
35
36
  var import_axios = __toESM(require("axios"), 1);
@@ -192,7 +193,15 @@ async function parseHTML(htmlRaw, meta) {
192
193
  }
193
194
  return getError(400 /* Fail */);
194
195
  }
195
- async function transformHtml2Markdown(url, options = {}) {
196
+ async function transformHtml2Markdown(html, url) {
197
+ try {
198
+ return parseHTML(html, { url });
199
+ } catch (err) {
200
+ console.log(err);
201
+ return getError(400 /* Fail */);
202
+ }
203
+ }
204
+ async function transformUrl2Markdown(url, options = {}) {
196
205
  const { axiosConfig = {} } = options;
197
206
  const { headers = {}, ...restConfig } = axiosConfig;
198
207
  const u = new URL(url);
@@ -209,7 +218,7 @@ async function transformHtml2Markdown(url, options = {}) {
209
218
  },
210
219
  ...restConfig
211
220
  });
212
- return parseHTML(res.data, { url: u.href });
221
+ return transformHtml2Markdown(res.data, url);
213
222
  } catch (err) {
214
223
  console.log(err);
215
224
  return getError(400 /* Fail */);
@@ -219,6 +228,8 @@ async function transformHtml2Markdown(url, options = {}) {
219
228
  0 && (module.exports = {
220
229
  Status,
221
230
  getTurnDownService,
222
- parseHTML
231
+ parseHTML,
232
+ transformHtml2Markdown,
233
+ transformUrl2Markdown
223
234
  });
224
235
  //# sourceMappingURL=index.cjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/turndownCode.ts","../src/formatHtml.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport default async function transformHtml2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,mBAA0C;AAC1C,IAAAA,kBAAqB;;;ACDd,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWC,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,eAAAC,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAAC,QAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,6BAAAC,QAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AH3EA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIA,eAAsB,UAAU,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,QAAI,sBAAK,OAAO;AAEtB,MAAI,QAAQ,EAAE,gBAAgB,EAAE,KAAK;AAErC,UAAQ,MAAM,KAAK,KAAK;AACxB,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO,uBAAoB;AAC/B;AASA,eAAO,uBACH,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,aAAAC,QAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC;AAAA,EAC9C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["import_cheerio","Status","cheerio","turnDownService","TurndownPluginGfm","axios"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/turndownCode.ts","../src/formatHtml.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport async function transformHtml2Markdown(\n html: string,\n /**\n * 这里的 url 是原始的 url,主要是用来映射内部跳转链接\n */\n url: string\n): Promise<TurnDownResult> {\n try {\n return parseHTML(html, { url })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport async function transformUrl2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return transformHtml2Markdown(res.data, url)\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,mBAA0C;AAC1C,IAAAA,kBAAqB;;;ACDd,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWC,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,eAAAC,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAAC,QAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,6BAAAC,QAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AH3EA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIA,eAAsB,UAAU,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,QAAI,sBAAK,OAAO;AAEtB,MAAI,QAAQ,EAAE,gBAAgB,EAAE,KAAK;AAErC,UAAQ,MAAM,KAAK,KAAK;AACxB,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO,uBAAoB;AAC/B;AAEA,eAAsB,uBAClB,MAIA,KACuB;AACvB,MAAI;AACA,WAAO,UAAU,MAAM,EAAE,IAAI,CAAC;AAAA,EAClC,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;AASA,eAAsB,sBAClB,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,aAAAC,QAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,uBAAuB,IAAI,MAAM,GAAG;AAAA,EAC/C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["import_cheerio","Status","cheerio","turnDownService","TurndownPluginGfm","axios"]}
package/dist/index.d.ts CHANGED
@@ -40,12 +40,17 @@ declare function parseHTML(htmlRaw: string, meta: {
40
40
  content: string;
41
41
  };
42
42
  }>;
43
+ declare function transformHtml2Markdown(html: string,
44
+ /**
45
+ * 这里的 url 是原始的 url,主要是用来映射内部跳转链接
46
+ */
47
+ url: string): Promise<TurnDownResult>;
43
48
  /**
44
49
  * 支持添加代理服务器
45
50
  */
46
51
  interface TransformHtml2MarkdownOptions {
47
52
  axiosConfig?: AxiosRequestConfig;
48
53
  }
49
- declare function transformHtml2Markdown(url: string, options?: TransformHtml2MarkdownOptions): Promise<TurnDownResult>;
54
+ declare function transformUrl2Markdown(url: string, options?: TransformHtml2MarkdownOptions): Promise<TurnDownResult>;
50
55
 
51
- export { Status, TurnDownResult, transformHtml2Markdown as default, getTurnDownService, parseHTML };
56
+ export { Status, TurnDownResult, getTurnDownService, parseHTML, transformHtml2Markdown, transformUrl2Markdown };
package/dist/index.js CHANGED
@@ -159,7 +159,15 @@ async function parseHTML(htmlRaw, meta) {
159
159
  }
160
160
  return getError(400 /* Fail */);
161
161
  }
162
- async function transformHtml2Markdown(url, options = {}) {
162
+ async function transformHtml2Markdown(html, url) {
163
+ try {
164
+ return parseHTML(html, { url });
165
+ } catch (err) {
166
+ console.log(err);
167
+ return getError(400 /* Fail */);
168
+ }
169
+ }
170
+ async function transformUrl2Markdown(url, options = {}) {
163
171
  const { axiosConfig = {} } = options;
164
172
  const { headers = {}, ...restConfig } = axiosConfig;
165
173
  const u = new URL(url);
@@ -176,7 +184,7 @@ async function transformHtml2Markdown(url, options = {}) {
176
184
  },
177
185
  ...restConfig
178
186
  });
179
- return parseHTML(res.data, { url: u.href });
187
+ return transformHtml2Markdown(res.data, url);
180
188
  } catch (err) {
181
189
  console.log(err);
182
190
  return getError(400 /* Fail */);
@@ -184,8 +192,9 @@ async function transformHtml2Markdown(url, options = {}) {
184
192
  }
185
193
  export {
186
194
  Status,
187
- transformHtml2Markdown as default,
188
195
  getTurnDownService,
189
- parseHTML
196
+ parseHTML,
197
+ transformHtml2Markdown,
198
+ transformUrl2Markdown
190
199
  };
191
200
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/turndownCode.ts","../src/formatHtml.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport default async function transformHtml2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],"mappings":";AAAA,OAAO,WAAmC;AAC1C,SAAS,YAAY;;;ACDd,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWA,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACRlB,OAAO,qBAAqB;AAC5B,OAAO,uBAAuB;;;ACJ9B,OAAO,aAAa;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,oBAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AH3EA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIA,eAAsB,UAAU,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,IAAI,KAAK,OAAO;AAEtB,MAAI,QAAQ,EAAE,gBAAgB,EAAE,KAAK;AAErC,UAAQ,MAAM,KAAK,KAAK;AACxB,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO,uBAAoB;AAC/B;AASA,eAAO,uBACH,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC;AAAA,EAC9C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["Status"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/turndownCode.ts","../src/formatHtml.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport async function transformHtml2Markdown(\n html: string,\n /**\n * 这里的 url 是原始的 url,主要是用来映射内部跳转链接\n */\n url: string\n): Promise<TurnDownResult> {\n try {\n return parseHTML(html, { url })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport async function transformUrl2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return transformHtml2Markdown(res.data, url)\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],"mappings":";AAAA,OAAO,WAAmC;AAC1C,SAAS,YAAY;;;ACDd,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWA,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACRlB,OAAO,qBAAqB;AAC5B,OAAO,uBAAuB;;;ACJ9B,OAAO,aAAa;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,oBAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AH3EA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIA,eAAsB,UAAU,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,IAAI,KAAK,OAAO;AAEtB,MAAI,QAAQ,EAAE,gBAAgB,EAAE,KAAK;AAErC,UAAQ,MAAM,KAAK,KAAK;AACxB,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO,uBAAoB;AAC/B;AAEA,eAAsB,uBAClB,MAIA,KACuB;AACvB,MAAI;AACA,WAAO,UAAU,MAAM,EAAE,IAAI,CAAC;AAAA,EAClC,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;AASA,eAAsB,sBAClB,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,uBAAuB,IAAI,MAAM,GAAG;AAAA,EAC/C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["Status"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiello/wechat-to-markdown",
3
- "version": "1.2.12",
3
+ "version": "1.2.13",
4
4
  "description": "解析微信文章 URL 为 markdown",
5
5
  "author": "Aiello Chan<aiello.chan@gmail.com>",
6
6
  "keywords": [